C/C++ Users Group Library 1996 July

home *** CD-ROM | disk | FTP | other *** search

/ C/C++ Users Group Library 1996 July / C-C++ Users Group Library July 1996.iso / vol_100 / 192_01 / bawk.c < prev next >

Wrap

Text File | 1980-01-01 | 12KB | 640 lines

/* * Bawk main program */ #define MAIN 1 #include <stdio.h> #include "bawk.h" /* * Main program */ main( argc, argv ) int argc; char **argv; { char gotrules, didfile, getstdin; getstdin = didfile = gotrules = 0; /* * Initialize global variables: */ Beginact = Endact = Rules = Rulep = #ifdef DEBUG Debug = #endif Filename = Linecount = Saw_break = 0; Stackptr = Stackbtm - 1; Stacktop = Stackbtm + MAXSTACKSZ; Nextvar = Vartab; strcpy( Fieldsep, " \t" ); strcpy( Recordsep, "\n" ); /* * Parse command line */ while ( --argc ) { if ( **(++argv) == '-' ) { /* * Process dash options. */ switch ( tolower( *(++(*argv)) ) ) { #ifdef DEBUG case 'd': ++Debug; break; #endif case 0: ++getstdin; --argv; goto dosomething; break; default: usage(); } } else { dosomething: if ( gotrules ) { /* * Already read rules file - assume this is * is a text file for processing. */ if ( ++didfile == 1 && Beginact ) doaction( Beginact ); if ( getstdin ) { --getstdin; newfile( 0 ); } else newfile( *argv ); process(); } else { /* * First file name argument on command line * is assumed to be a rules file - attempt to * compile it. */ if ( getstdin ) { --getstdin; newfile( 0 ); } else newfile( *argv ); compile(); gotrules = 1; } } } if ( !gotrules ) usage(); if ( ! didfile ) { /* * Didn't process any files yet - process stdin. */ newfile( 0 ); if ( Beginact ) doaction( Beginact ); process(); } if ( Endact ) doaction( Endact ); } /* * Regular expression/action file compilation routines. */ compile() { /* * Compile regular expressions and C actions into Rules struct, * reading from current input file "Fileptr". */ int c, len; #ifdef DEBUG if ( Debug ) error( "compiling...", 0 ); #endif while ( (c = getcharacter()) != -1 ) { if ( c==' ' || c=='\t' || c=='\n' ) /* swallow whitespace */ ; else if ( c=='#' ) { /* * Swallow comments */ while ( (c=getcharacter()) != -1 && c!='\n' ) ; } else if ( c=='{' ) { #ifdef DEBUG if ( Debug ) error( "action", 0 ); #endif /* * Compile (tokenize) the action string into our * global work buffer, then allocate some memory * for it and copy it over. */ ungetcharacter( '{' ); len = act_compile( Workbuf ); if ( Rulep && Rulep->action ) { Rulep->nextrule = getmem( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; fillmem( Rulep, sizeof( *Rulep ), 0 ); } if ( !Rulep ) { /* * This is the first action encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = getmem( sizeof( *Rulep ) ); fillmem( Rulep, sizeof( *Rulep ), 0 ); } Rulep->action = getmem( len ); movemem( Workbuf, Rulep->action, len ); } else if ( c==',' ) { #ifdef DEBUG if ( Debug ) error( "stop pattern", 0 ); #endif /* * It's (hopefully) the second part of a two-part * pattern string. Swallow the comma and start * compiling an action string. */ if ( !Rulep || !Rulep->pattern.start ) error( "stop pattern without a start", RE_ERROR ); if ( Rulep->pattern.stop ) error( "already have a stop pattern", RE_ERROR ); len = pat_compile( Workbuf ); Rulep->pattern.stop = getmem( len ); movemem( Workbuf, Rulep->pattern.stop, len ); } else { /* * Assume it's a regular expression pattern */ #ifdef DEBUG if ( Debug ) error( "start pattern", 0 ); #endif ungetcharacter( c ); len = pat_compile( Workbuf ); if ( *Workbuf == T_BEGIN ) { /* * Saw a "BEGIN" keyword - compile following * action into special "Beginact" buffer. */ len = act_compile( Workbuf ); Beginact = getmem( len ); movemem( Workbuf, Beginact, len ); continue; } if ( *Workbuf == T_END ) { /* * Saw an "END" keyword - compile following * action into special "Endact" buffer. */ len = act_compile( Workbuf ); Endact = getmem( len ); movemem( Workbuf, Endact, len ); continue; } if ( Rulep ) { /* * Already saw a pattern/action - link in * another Rules structure. */ Rulep->nextrule = getmem( sizeof( *Rulep ) ); Rulep = Rulep->nextrule; fillmem( Rulep, sizeof( *Rulep ), 0 ); } if ( !Rulep ) { /* * This is the first pattern encountered. * Allocate the first Rules structure and * initialize it */ Rules = Rulep = getmem( sizeof( *Rulep ) ); fillmem( Rulep, sizeof( *Rulep ), 0 ); } if ( Rulep->pattern.start ) error( "already have a start pattern", RE_ERROR ); Rulep->pattern.start = getmem( len ); movemem( Workbuf, Rulep->pattern.start, len ); } } endfile(); } /* * Text file main processing loop. */ process() { /* * Read a line at a time from current input file at "Fileptr", * then apply each rule in the Rules chain to the input line. */ int i; #ifdef DEBUG if ( Debug ) error( "processing...", 0 ); #endif Recordcount = 0; while ( getline() ) { /* * Parse the input line. */ Fieldcount = parse( Linebuf, Fields, Fieldsep ); #ifdef DEBUG if ( Debug>1 ) { printf( "parsed %d words:\n", Fieldcount ); for(i=0; i<Fieldcount; ++i ) printf( "<%s>\n", Fields[i] ); } #endif Rulep = Rules; do { if ( ! Rulep->pattern.start ) { /* * No pattern given - perform action on * every input line. */ doaction( Rulep->action ); } else if ( Rulep->pattern.startseen ) { /* * Start pattern already found - perform * action then check if line matches * stop pattern. */ doaction( Rulep->action ); if ( dopattern( Rulep->pattern.stop ) ) Rulep->pattern.startseen = 0; } else if ( dopattern( Rulep->pattern.start ) ) { /* * Matched start pattern - perform action. * If a stop pattern was given, set "start * pattern seen" flag and process every input * line until stop pattern found. */ doaction( Rulep->action ); if ( Rulep->pattern.stop ) Rulep->pattern.startseen = 1; } } while ( Rulep = Rulep->nextrule ); /* * Release memory allocated by parse(). */ while ( Fieldcount ) free( Fields[ --Fieldcount ] ); } } /* * Miscellaneous functions */ parse( str, wrdlst, delim ) char *str; char *wrdlst[]; char *delim; { /* * Parse the string of words in "str" into the word list at "wrdlst". * A "word" is a sequence of characters delimited by one or more * of the characters found in the string "delim". * Returns the number of words parsed. * CAUTION: the memory for the words in "wrdlst" is allocated * by malloc() and should eventually be returned by free()... */ int wrdcnt, wrdlen; char wrdbuf[ MAXLINELEN ], c; wrdcnt = 0; while ( *str ) { while ( instr( *str, delim ) ) ++str; if ( !*str ) break; wrdlen = 0; while ( (c = *str) && !instr( c, delim ) ) { wrdbuf[ wrdlen++ ] = c; ++str; } wrdbuf[ wrdlen++ ] = 0; /* * NOTE: allocate a MAXLINELEN sized buffer for every * word, just in case user wants to copy a larger string * into a field. */ wrdlst[ wrdcnt ] = getmem( MAXLINELEN ); strcpy( wrdlst[ wrdcnt++ ], wrdbuf ); } return wrdcnt; } unparse( wrdlst, wrdcnt, str, delim ) char *wrdlst[]; int wrdcnt; char *str; char *delim; { /* * Replace all the words in "str" with the words in "wrdlst", * maintaining the same word seperation distance as found in * the string. * A "word" is a sequence of characters delimited by